package com.english.common;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Data;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Json;

@Data
public class TextContentProcessor  implements PageProcessor {
    private String content;
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
    @Override
    public void process(Page page) {
        Json json = page.getJson();
        //处理json
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            JsonNode jsonNode = objectMapper.readTree(String.valueOf(json));
            JsonNode objectsNode = jsonNode.get("content");
            this.content = initialContentToContent( objectsNode.textValue() );
        }
        catch (JsonProcessingException e) {
            throw new RuntimeException(e);
        }
    }
    public TextContentProcessor(String id) {
        content = "";
        String textUrl = "https://apiv3.shanbay.com/news/articles/" + id + "?source=1";
        site.addCookie("auth_token", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MjUyNTg1OTM5LCJleHAiOjE2ODk5MjI5NDQsImV4cF92MiI6MTY4OTkyMjk0NCwiZGV2aWNlIjoiIiwidXNlcm5hbWUiOiJXZWNoYXRfOWI5ODUyY2U2YzYzZDg4MCIsImlzX3N0YWZmIjowLCJzZXNzaW9uX2lkIjoiMzA2MmU5OTgxZmI4MTFlZTg4M2ZlMmVhZDA3N2FlNDMifQ.1Kg_00QpCzfvCCxLm7DhIYLBpPKST-VDb8b8OAKxizQ");
        Spider.create(this).addUrl(textUrl).thread(3).run();
    }
    private String initialContentToContent(String content) {
        String str = "";
        for (int i = 0; i + 6< content.length(); i++){
            if( content.substring(i, i + 7).equals("[CDATA[") ){
                i += 7;
                int lenth = 0;
                while( !content.substring(i + lenth, i + lenth + 1).equals("]")){
                    lenth++;
                }
                if(lenth != 0) {
                    str += "#";
                    str += content.substring(i, i + lenth);
                }
            }
        }
        return str;
    }

}
